# -*- coding=utf-8 -*-

#获取省的信息：https://db.yaozh.com/hmap
#写此代码的目的是为了获取每个省都有多少个医院总数
#次python文件的目的是天津市一个市的医院总数

'''
换个思路：先获取每个省的总数，然后根据总数，拿到所有的医院的单个页，然后查看一共有多少个医院
此例子是获得所有省市的名字
并且是用requests来替换（感觉下来requests的效率更好）
'''

import requests
import re
import urllib.parse

def get_regex_msg(str,regex_pattern):
    return re.compile(regex_pattern).findall(str)

def request_web_body(url):
    regex_rule='data-names="province" data-list=([\s\S]*?)data-src="">'
    html_body_opener=requests.get(url, verify=False)
    #html_body_source=html_body_opener.decode("unicode-escape")
    html_body_source = html_body_opener.content.decode("unicode-escape")
    #print(html_body_source)
    return get_regex_msg(html_body_source,regex_rule)

def generate_province_list(province_msg):
    pat_province_name = '"name":"([\s\S]*?)","val"'
    province_html_body_list = get_regex_msg(province_msg, pat_province_name)
    #print(province_html_body_list[0])
    province_list=list()
    for i in range(1,len(province_html_body_list)):
        province_list.append(province_html_body_list[i])
    #print(province_list)
    return province_list

def get_province_hospital_count(province_name):
    province_name = urllib.parse.quote(province_name)
    grade = urllib.parse.quote("全部")
    type = urllib.parse.quote("全部")
    p = 1
    pagesize = 30
    base_url = "https://db.yaozh.com/"
    param = "hmap?grade=%s&p=%d&pageSize=%d&province=%s&type=%s" % (grade, p, pagesize, province_name, type)
    target_url = base_url + param
    print(target_url)
    requests.packages.urllib3.disable_warnings()
    page_source_content_info = requests.get(target_url, verify=False)
    print(page_source_content_info.text)
    return get_province_obj_count(page_source_content_info.text)

def get_province_obj_count(content):
    pattern='data-total="([0-9]+?)"'
    result=re.compile(pattern).findall(content)
    return result[0]

if __name__ == '__main__':
    base_url="https://db.yaozh.com/hmap"
    #province_msg=request_web_body(base_url)[0]
    #province_list=generate_province_list(province_msg)
    #province_name="天津市"
    province_name="江苏省"
    province_count=get_province_hospital_count(province_name)
    print(province_count)